# coding:utf-8

import requests
import threading
from log import Logger
class ThreadSpider(threading.Thread):
    def __init__(self,name,page_queue,data_queue):
        super(ThreadSpider, self).__init__()
        self.name = name
        self.page_queue = page_queue
        self.data_queue = data_queue
        self.logger = Logger()
        self.headers = {"User-Agent" : "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;"}

    def run(self):
        print('线程%s开始采集数据'%self.name)
        while not self.page_queue.empty():
            try:
                page = self.page_queue.get(False)
                self.page_queue.task_done()
                url = 'https://www.pengfu.com/content_1795'+str(page).rjust(3, '0')+'_1.html'
                html = requests.get(url,headers=self.headers)
                self.data_queue.put(html.content)
                if self.page_queue.empty():
                    break
            except IOError:
                self.logger.debug('IOError in ThreadSpider.py')

        print('线程%s完成数据采集' % self.name)
